output: pdf_document
In this work DAE devices distribution is studied. DAE stands to automated external defibrillator, which are useful in case of cardiac emergencies, while time is crucial, so the closeness to one of this devices.
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# Libraries
# install.packages("tidyverse")
# install.packages("sp")
# install.packages("sf")
# install.packages("rgdal") # DEPRECATED
# install.packages("rmapshaper")
# install.packages("wbstats")
# install.packages("rnaturalearth")
# install.packages("mapview")
# install.packages("spatialreg")
# install.packages("spatstat")
library(sp)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rgeos)
## rgeos version: 0.6-2, (SVN revision 693)
## GEOS runtime version: 3.10.2-CAPI-1.16.0
## Please note that rgeos will be retired during 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
## GEOS using OverlayNG
## Linking to sp version: 1.6-0
## Polygon checking: TRUE
##
##
## Attaching package: 'rgeos'
##
## The following object is masked from 'package:dplyr':
##
## symdiff
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
library(tidyr)
#library(raster)
#library(rgdal)
dae_full <- read.csv("progetto-dae.csv", sep = ";")
duplicated_record = duplicated(dae_full)
double_record = subset(dae_full, duplicated_record)
print(double_record)
## [1] Nome Città Indirizzo Ubicazione
## [5] Note Orari Telefono Geo.Point
## [9] Quartiere Zona.di.prossimità Area.statistica
## <0 rows> (or 0-length row.names)
NON CI SONO DUPLICATI SE SI CONSIDERANO LE NOTE: ES TECHNOGYM PRESSO PORTINERIA O IN PRODUZIONE, MA SCARTANDO ALCUNE VARIABILI PARE CI SIANO VALORI RIPETUTI, quindi lavorando su dae e non dae_full. Li elimino comunque considerando che sono solo 83 vaori su 5298.
dae <- dae_full[c("Nome", "Città", "Geo.Point")]
head(dae)
duplicated_rows = duplicated(dae)
double_data = subset(dae, duplicated_rows)
print(double_data)
## Nome
## 44 ESSELUNGA S.P.A.
## 45 ESSELUNGA S.P.A.
## 46 ESSELUNGA S.P.A.
## 47 ESSELUNGA S.P.A.
## 48 ESSELUNGA S.P.A.
## 1206 Vertivsrl
## 1386 VILLAGGIO DELLA SALUTE
## 1469 L.F. S.P.A.
## 1511 BORMIOLI LUIGI SPA
## 1519 BARILLA G. E R. FRATELLI S.P.A.
## 1520 BARILLA G. E R. FRATELLI S.P.A.
## 1687 GSK MANUFACTORING SPA
## 1841 ESSELUNGA S.P.A.
## 1842 ESSELUNGA S.P.A.
## 1885 Cattani S.P.A.
## 1890 Agenzia delle Entrate di Parma - Riscossione
## 1980 AGENZIA INTERREGIONALE PER IL FIUME PO
## 2107 SAIB S.P.A.
## 2346 DUERRE TUBI STYLE (DTS 2)
## 2405 Scuola secondaria di 1° grado Borgese
## 2411 Istituto tecnico economico statale Roberto Valturio
## 2412 Istituto tecnico economico statale Roberto Valturio
## 3027 Istituto Alberghiero Savioli Riccione
## 3084 TECNOPRIMAF SRL
## 3125 FIERA
## 3138 CAAB
## 3144 DITTA DATALOGIC
## 3180 POLISPORTIVA MONTEVEGLIO
## 3215 POLISPORTIVA MARTORANO
## 3234 CASSA DI RISPARMIO DI CESENA
## 3248 TECHNOGYM
## 3254 ASSOCIAZIONE CALCIO CESENA
## 3268 BORMIOLI LUIGI SPA
## 3397 DATALOGIC SPA
## 3546 Polisportiva Stella
## 3607 ESSELUNGA S.P.A.
## 3608 ESSELUNGA S.P.A.
## 3609 ESSELUNGA S.P.A.
## 3610 ESSELUNGA S.P.A.
## 3612 FORNOVO GAS S.P.A.
## 3616 INCO SPA
## 3653 Cattani S.P.A.
## 3662 AIA Stabilimento di Capoponte
## 3664 Val Parma Hospital
## 3679 CARPIGIANI
## 3748 AGENZIA INTERREGIONALE PER IL FIUME PO
## 3880 ROSSETTI MARKET S.r.l.
## 3887 FOSSATI SERRAMENTI SRL
## 3889 LA PIZZA+ 1 SPA
## 4030 CLUB VILLAGE E HOTEL SPIAGGIA ROMEA
## 4167 Liceo Scientifico Volta-Fellini
## 4186 Istituto "De Gasperi"
## 4188 Medicina dello Sport AUSL (Colosseo)
## 4206 Palestra Scuola Media Pazzini
## 4213 Stazione Carabinieri di Novafeltria (RN)
## 4375 IMA SPA
## 4386 AUTOMOBILI LAMBORGHINI SPA
## 4388 AUTOMOBILI LAMBORGHINI SPA
## 4607 CENTRO PER L'AUTOTRASPORTO
## 4779 fruttagel
## 4781 farmacia santerno
## 4816 Stadio del nuoto Centro Sportivo Riccione
## 4865 GRANDI SALUMIFICI ITALIANI
## 4877 I.I.S. ANTONIO MEUCCI
## 4884 Arcispazio Piumazzo
## 4923 CAAB
## 4924 CAAB
## 4933 IMA SPA
## 4938 IKEA
## 5001 OROGEL SOC.COOP.AGRICOLA
## 5002 OROGEL SOC.COOP.AGRICOLA
## 5004 CASSA DI RISPARMIO DI CESENA
## 5005 CASSA DI RISPARMIO DI CESENA
## 5006 ASSOCIAZIONE CALCIO CESENA
## 5022 TECHNOGYM
## 5043 BORMIOLI LUIGI SPA
## 5044 BORMIOLI LUIGI SPA
## 5045 BORMIOLI LUIGI SPA
## 5122 ISII MARCONI
## 5144 AZIENDA FLY GLOBAL NET
## 5185 GSK MANUFACTORING SPA
## 5186 GSK MANUFACTORING SPA
## 5195 Colombini Group Spa
## Città Geo.Point
## 44 PARMA 44.849708557128906, 10.367918014526367
## 45 PARMA 44.849708557128906, 10.367918014526367
## 46 PARMA 44.849708557128906, 10.367918014526367
## 47 PARMA 44.849708557128906, 10.367918014526367
## 48 PARMA 44.849708557128906, 10.367918014526367
## 1206 CASTEL GUELFO DI BOLOGNA 44.43602752685547, 11.617171287536621
## 1386 MONTERENZIO 44.31161880493164, 11.460027694702148
## 1469 CESENA 44.15047836303711, 12.21621036529541
## 1511 PARMA 44.82695007324219, 10.326359748840332
## 1519 PARMA 44.823341369628906, 10.381290435791016
## 1520 PARMA 44.823341369628906, 10.381290435791016
## 1687 TORRILE 44.8971061706543, 10.35757827758789
## 1841 PARMA 44.849708557128906, 10.367918014526367
## 1842 PARMA 44.849708557128906, 10.367918014526367
## 1885 PARMA 44.84624481201172, 10.363381385803223
## 1890 PARMA 44.81496047973633, 10.312209129333496
## 1980 PARMA 44.807926177978516, 10.329997062683105
## 2107 CAORSO 45.042388916015625, 9.822153091430664
## 2346 MARANELLO 44.536258697509766, 10.875616073608398
## 2405 RIMINI 44.05144500732422, 12.575874328613281
## 2411 RIMINI 44.04925537109375, 12.58498764038086
## 2412 RIMINI 44.04925537109375, 12.58498764038086
## 3027 RICCIONE 44.00173568725586, 12.64654541015625
## 3084 SAN CESARIO SUL PANARO 44.58967590332031, 11.02854061126709
## 3125 BOLOGNA 44.50872039794922, 11.366950035095215
## 3138 BOLOGNA 44.51411056518555, 11.40820026397705
## 3144 MONTE SAN PIETRO 44.422298431396484, 11.173110008239746
## 3180 VALSAMOGGIA 44.4734992980957, 11.101129531860352
## 3215 CESENA 44.168968200683594, 12.257120132446289
## 3234 CESENA 44.14461135864258, 12.237059593200684
## 3248 CESENA 44.1708984375, 12.277190208435059
## 3254 CESENA 44.12149429321289, 12.184496879577637
## 3268 PARMA 44.82695007324219, 10.326359748840332
## 3397 CALDERARA DI RENO 44.5435905456543, 11.30077838897705
## 3546 RIMINI 44.04840850830078, 12.572135925292969
## 3607 PARMA 44.849708557128906, 10.367918014526367
## 3608 PARMA 44.849708557128906, 10.367918014526367
## 3609 PARMA 44.849708557128906, 10.367918014526367
## 3610 PARMA 44.849708557128906, 10.367918014526367
## 3612 TRAVERSETOLO 44.64885711669922, 10.385416984558105
## 3616 PARMA 44.78468322753906, 10.287612915039062
## 3653 PARMA 44.84624481201172, 10.363381385803223
## 3662 TIZZANO VAL PARMA 44.48647689819336, 10.243917465209961
## 3664 LANGHIRANO 44.617034912109375, 10.268505096435547
## 3679 ANZOLA DELL EMILIA 44.5411491394043, 11.21133041381836
## 3748 PARMA 44.807926177978516, 10.329997062683105
## 3880 ALSENO 44.886844635009766, 9.98708724975586
## 3887 ROTTOFRENO 45.0426139831543, 9.601079940795898
## 3889 PODENZANO 44.991371154785156, 9.690260887145996
## 4030 COMACCHIO 44.78233337402344, 12.250884056091309
## 4167 RICCIONE 44.002685546875, 12.647295951843262
## 4186 MORCIANO DI ROMAGNA 43.91582489013672, 12.64973258972168
## 4188 RIMINI 44.040733337402344, 12.57896900177002
## 4206 VERUCCHIO 44.009498596191406, 12.434885025024414
## 4213 NOVAFELTRIA 43.890804290771484, 12.289532661437988
## 4375 OZZANO DELL EMILIA 44.44227981567383, 11.49416732788086
## 4386 SANT AGATA BOLOGNESE 44.65670394897461, 11.122122764587402
## 4388 SANT AGATA BOLOGNESE 44.65787887573242, 11.126039505004883
## 4607 CESENA 44.130393981933594, 12.234954833984375
## 4779 ALFONSINE 44.51097869873047, 12.042570114135742
## 4781 RAVENNA 44.43552017211914, 12.059149742126465
## 4816 RICCIONE 44.00365447998047, 12.637453079223633
## 4865 MODENA 44.591796875, 10.950569152832031
## 4877 CARPI 44.78599548339844, 10.86845874786377
## 4884 CASTELFRANCO EMILIA 44.55743408203125, 11.062333106994629
## 4923 BOLOGNA 44.51411056518555, 11.40820026397705
## 4924 BOLOGNA 44.51411056518555, 11.40820026397705
## 4933 OZZANO DELL EMILIA 44.442588806152344, 11.48770809173584
## 4938 CASALECCHIO DI RENO 44.487491607666016, 11.251609802246094
## 5001 CESENA 44.167789459228516, 12.216970443725586
## 5002 CESENA 44.17005920410156, 12.21891975402832
## 5004 CESENA 44.14461135864258, 12.237059593200684
## 5005 CESENA 44.14461135864258, 12.237059593200684
## 5006 CESENA 44.14017105102539, 12.260809898376465
## 5022 CESENA 44.1708984375, 12.277190208435059
## 5043 PARMA 44.82695007324219, 10.326359748840332
## 5044 PARMA 44.82695007324219, 10.326359748840332
## 5045 PARMA 44.82695007324219, 10.326359748840332
## 5122 PIACENZA 45.04444122314453, 9.691390037536621
## 5144 CAORSO 45.05611038208008, 9.879170417785645
## 5185 TORRILE 44.8971061706543, 10.35757827758789
## 5186 TORRILE 44.8971061706543, 10.35757827758789
## 5195 CASTELLO SERRAVALLE 43.99053192138672, 12.519020080566406
#print(count(double_data))
dae <- unique(dae)
#head(dae)
count(dae)
unique_cities <- unique(dae$Città)
city_numb = length(unique_cities)
print(city_numb)
## [1] 326
#table(dae$Città)
barplot(table(dae$Città),main='DAE per Municipality')
Since it’s difficult to understand, select the 10 with most and less
DAE.
# Count the number of occurrences of each city
city_counts <- table(dae$Città)
# Sort the cities by their counts in descending order
sorted_cities <- sort(city_counts, decreasing = TRUE)
#print(sorted_cities)
head(sorted_cities, 10)
##
## PIACENZA BOLOGNA PARMA REGGIO NELL EMILIA
## 340 258 253 203
## RIMINI CESENA FERRARA MODENA
## 201 183 148 137
## RAVENNA CESENATICO
## 124 88
tail(sorted_cities, 10)
##
## LAGOSANTO LOIANO MASI TORELLO
## 1 1 1
## MEZZANI - SORBOLO MEZZANI MODIGLIANA MONTESE
## 1 1 1
## MONTIANO ROCCA SAN CASCIANO TORNOLO
## 1 1 1
## ZERBA
## 1
# Extract the names of the top and bottom cities
top_cities <- names(sorted_cities)[1:10]
bottom_cities <- names(sorted_cities)[(length(sorted_cities) - 9):length(sorted_cities)]
#print(top_cities)
#print(bottom_cities)
# Filter the data for the top and bottom cities
top_data <- dae[dae$Città %in% top_cities, ]
bottom_data <- dae[dae$Città %in% bottom_cities, ]
# Barplot for top cities
barplot(table(top_data$Città), main = 'Top 10 Cities with Most DAE', col = "forestgreen", las = 1, cex.names = 0.4)
# Barplot for bottom cities
barplot(table(bottom_data$Città), main = '10 Cities with Less DAE', col = "orangered", las = 2, cex.names = 0.4)
# devide geopoint into latutidue + longitude
dae_coord <- separate(dae,
Geo.Point,
into = c("Latitude", "Longitude"),
sep = ", ")
head(dae_coord)
# Convert the dataset to an 'sf' object and specify CRS
dae_wgs <- st_as_sf(dae_coord,
coords = c("Longitude", "Latitude"),
crs = 4326) # 3857 EPSG code for WGS84 (standard for lat/long)
head(dae_wgs)
#print(dae_wgs)
ggplot(dae_wgs) +
geom_sf(color = "forestgreen", alpha = .5, size = .3) +
labs(title = "Single Regional Register of Defibrillators - AED") +
theme_minimal() +
coord_sf(expand = FALSE)
Map Emilia-Romagna municipalities borders
## import shapefile - SpatialData
municipalities <- sf::st_read("V_COM_GPG_3.shx")
## Reading layer `V_COM_GPG_3' from data source
## `/Users/GianmarcoSantoro/miniconda3/Projects/GeoSpacial/V_COM_GPG_3.shx'
## using driver `ESRI Shapefile'
## Simple feature collection with 330 features and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 9.198528 ymin: 43.73088 xmax: 12.75532 ymax: 45.13901
## CRS: NA
#print(municipalities)
#summary(municipalities)
# sistema di riferimento geografico: WGS84 - UTM ZONE 32N
# con il pacchetto "sf" si possono convertire i CRS
# Set the correct CRS, EPSG:4326 to WGS84
st_crs(municipalities) <- st_crs("EPSG:4326")
# Convert the dataset to an 'sf' object and specify CRS
municipalities_coord <- st_as_sf(municipalities,
coords = c("Longitude", "Latitude"),
crs = 4326)
print(municipalities_coord)
## Simple feature collection with 330 features and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 9.198528 ymin: 43.73088 xmax: 12.75532 ymax: 45.13901
## Geodetic CRS: WGS 84
## First 10 features:
## geometry
## 1 MULTIPOLYGON (((10.86582 44...
## 2 MULTIPOLYGON (((12.6792 43....
## 3 MULTIPOLYGON (((9.888571 45...
## 4 MULTIPOLYGON (((9.966079 45...
## 5 MULTIPOLYGON (((10.074 44.9...
## 6 MULTIPOLYGON (((12.14862 43...
## 7 MULTIPOLYGON (((11.43073 44...
## 8 MULTIPOLYGON (((10.75838 44...
## 9 MULTIPOLYGON (((11.80945 44...
## 10 MULTIPOLYGON (((9.447531 44...
#summary(municipalities_coord)
ggplot(municipalities_coord) +
geom_sf(color = "darkgray", alpha = .5, size = .3) +
labs(title = "Municipalities") +
theme_minimal() +
coord_sf(expand = FALSE)
Check of reference systems are same
st_crs(dae_wgs) == st_crs(municipalities_coord)
## [1] TRUE
Map DAE devices and municipalities
# Plot of devices and municipalities
ggplot() +
geom_sf(data = municipalities_coord, color = "darkgray", alpha = 0.5, size = 0.3) +
geom_sf(data = dae_wgs, color = "forestgreen", alpha = 0.5, size = 0.3) +
labs(title = "Defibrillators in Emilia-Romagna") +
theme_minimal() +
coord_sf(expand = FALSE)
We can notice that 2 devices are located in the Adriatic sea, as well as
San Marino’s ones are shown. The ones in the sea can be in islands or
gas platforms or in other off-shore systems.
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 3.1-0
## Loading required package: spatstat.random
## spatstat.random 3.1-4
## Loading required package: spatstat.explore
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## spatstat.explore 3.1-0
## Loading required package: spatstat.model
## Loading required package: rpart
## spatstat.model 3.2-1
## Loading required package: spatstat.linnet
## spatstat.linnet 3.0-6
##
## spatstat 3.0-3
## For an introduction to spatstat, type 'beginner'
library(splancs) # to compare kernel estimations
##
## Spatial Point Pattern Analysis Code in S-Plus
##
## Version 2 - Spatial and Space-Time analysis
##
## Attaching package: 'splancs'
## The following object is masked from 'package:dplyr':
##
## tribble
## The following object is masked from 'package:tidyr':
##
## tribble
## The following object is masked from 'package:tibble':
##
## tribble
head(dae_wgs)
# EPSG:4326 for WGS 84 and EPSG:3003 for Italian projection from Monte Mario reference point
dae_proj1 <- st_transform(dae_wgs, crs = 3003)
print(dae_proj1)
## Simple feature collection with 5215 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 1522642 ymin: 4851522 xmax: 1800810 ymax: 4994375
## Projected CRS: Monte Mario / Italy zone 1
## First 10 features:
## Nome Città
## 1 COOP SOCIALE BUCANEVE BARDI
## 2 Assicurazioni Generali RIMINI
## 3 Bar Dovesi Rimini RIMINI
## 4 Tennis Club Riccione RICCIONE
## 5 Liceo Scientifico Einstein Palestra RIMINI
## 6 CURIA VESCOVILE FORLI
## 7 Stadio del nuoto Centro Sportivo Riccione RICCIONE
## 8 BODY LIFE 2.0 S.S.D.R.L CASTELNUOVO RANGONE
## 9 Chiesa di Sant'Antonio SALSOMAGGIORE TERME
## 10 A.S.D. Bedoniese United BEDONIA
## geometry
## 1 POINT (1557844 4942194)
## 2 POINT (1785075 4881710)
## 3 POINT (1785827 4884659)
## 4 POINT (1791947 4878554)
## 5 POINT (1787171 4883532)
## 6 POINT (1742442 4901098)
## 7 POINT (1791651 4878730)
## 8 POINT (1653887 4935403)
## 9 POINT (1578199 4963594)
## 10 POINT (1550200 4928683)
municipalities_proj <- st_transform(municipalities_coord, crs = 3003)
print(municipalities_proj)
## Simple feature collection with 330 features and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1515770 ymin: 4846959 xmax: 1801305 ymax: 4998818
## Projected CRS: Monte Mario / Italy zone 1
## First 10 features:
## geometry
## 1 MULTIPOLYGON (((1648239 493...
## 2 MULTIPOLYGON (((1795348 487...
## 3 MULTIPOLYGON (((1570027 498...
## 4 MULTIPOLYGON (((1576002 499...
## 5 MULTIPOLYGON (((1584799 497...
## 6 MULTIPOLYGON (((1753181 485...
## 7 MULTIPOLYGON (((1694092 490...
## 8 MULTIPOLYGON (((1639384 494...
## 9 MULTIPOLYGON (((1723832 491...
## 10 MULTIPOLYGON (((1535488 494...
# Extracting coordinates into separate columns, devide geometry into latutidue + longitude
coordinates <- st_coordinates(dae_proj1)
dae_proj <- cbind(dae_proj1$Nome, dae_proj1$Città, coordinates)
# Rename the columns
colnames(dae_proj) <- c("Nome", "Città", "Longitude", "Latitude")
head(dae_proj)
## Nome Città Longitude
## [1,] "COOP SOCIALE BUCANEVE" "BARDI" "1557844.23150378"
## [2,] "Assicurazioni Generali" "RIMINI" "1785075.14596705"
## [3,] "Bar Dovesi Rimini" "RIMINI" "1785826.93548339"
## [4,] "Tennis Club Riccione" "RICCIONE" "1791946.69377474"
## [5,] "Liceo Scientifico Einstein Palestra" "RIMINI" "1787170.52588819"
## [6,] "CURIA VESCOVILE" "FORLI" "1742442.10838033"
## Latitude
## [1,] "4942193.79963972"
## [2,] "4881710.22510532"
## [3,] "4884658.81470574"
## [4,] "4878553.52190728"
## [5,] "4883531.90980742"
## [6,] "4901097.93272644"
dae_small <- dae_proj
summary(dae_small)
## Nome Città Longitude Latitude
## Length:5215 Length:5215 Length:5215 Length:5215
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
# Ensure dae_small is a dataframe
dae_small <- as.data.frame(dae_small)
# Convert Longitude and Latitude columns to numeric
dae_small$Longitude <- as.numeric(dae_small$Longitude)
dae_small$Latitude <- as.numeric(dae_small$Latitude)
# Round Longitude and Latitude to 5 decimal places
dae_small$Longitude <- round(dae_small$Longitude, 5)
dae_small$Latitude <- round(dae_small$Latitude, 5)
# duplicated_small = duplicated(dae_small)
# double_small = subset(dae_small, duplicated_small)
# print(double_small)
# Remove duplicates
dae_small <- unique(dae_small)
# Check the first few rows to confirm the changes
summary(dae_small)
## Nome Città Longitude Latitude
## Length:5215 Length:5215 Min. :1522642 Min. :4851522
## Class :character Class :character 1st Qu.:1607806 1st Qu.:4917708
## Mode :character Mode :character Median :1656938 Median :4940888
## Mean :1664463 Mean :4937971
## 3rd Qu.:1718146 3rd Qu.:4963326
## Max. :1800810 Max. :4994375
head(dae_small)
Considered rectangular area containing location of devices
x_range <- range(dae_small$Longitude)
y_range <- range(dae_small$Latitude)
xy_area <- owin(x_range, y_range)
# Transform into ppp (point pattern dataset) to show info in the area of interest
dae_ppp = ppp(dae_small$Longitude, dae_small$Latitude, window = xy_area) # warning: duplicated points
## Warning: data contain duplicated points
summary(dae_ppp)
## Planar point pattern: 5215 points
## Average intensity 1.312378e-07 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 1 decimal place
## i.e. rounded to the nearest multiple of 0.1 units
##
## Window: rectangle = [1522641.6, 1800810.1] x [4851522, 4994375] units
## (278200 x 142900 units)
## Window area = 3.9737e+10 square units
# Dato che ci possono essere delle città in cui c'è stato più di un evento,
# "sposto" leggermente i punti "duplicati"
dae_ppp_r <- dae_ppp[!duplicated(dae_ppp)]
dup_j <- rjitter(dae_ppp[duplicated(dae_ppp)], radius=0.01, retry=TRUE, nsim=1, drop=TRUE)
#riaggiungo i punti in locations molto vicine, evitando che i nuovi siano sovrapposti
dae_ppp_j <- superimpose(dae_ppp_r,dup_j)
dae_ppp_originali <- dae_ppp
dae_ppp <- dae_ppp_j
plot(dae_ppp_originali, cols='forestgreen', cex=0.3, main = "Point Pattern Plot")
plot(dup_j, pch=2, cols='brown', add=T, cex=0.6)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.3)
# Create a legend for the plot
legend("bottomleft",
legend = c("ppp", "Duplicates"),
col = c("forestgreen", "brown"),
pch = c(1, 2),
cex = 0.5)
# In genere con i PPP è bene non avere due punti esattamente uguali...
# "when the data has coincidence points, some statistical procedures will be severely affected.
# So it is always strongly advisable to check for duplicate points and to decide on a strategy
# for dealing with them if they are present” (Baddeley et al., 2016: p.60).
Kernel Density Estimate: estimate the intensity of the process, with a kernel estimation
k500 = density(dae_ppp, sigma = 500, dimyx = 500)
plot(k500)
#plot(dae_proj1$geometry, cex=0.01, add=T)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)
We can see in Piacenza, Modena, Parma, Cesena, Rimini some high values,
but still not so informative
Select an optimal bandwidth with Likelihood Cross Validation critirion
# Setting the seed for reproducibility
set.seed(23)
# Calculate the bandwidth for the point pattern
sigma_ppl <- bw.ppl(dae_ppp)
print(sigma_ppl)
## sigma
## 961.3408
kopt = density(dae_ppp, sigma = sigma_ppl, dimyx = 500)
plot(kopt)
#plot(dae_proj1$geometry, col = "forestgreen" , add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)
Try some more values
k2000 = density(dae_ppp, sigma = 2000, dimyx = 500)
plot(k2000)
plot(dae_proj1$geometry, col = "forestgreen", add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)
k5000 = density(dae_ppp, sigma = 5000, dimyx = 500)
plot(k5000)
plot(dae_proj1$geometry, col = "forestgreen", add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)
From my point of view this is the most informative plot.
k10000 = density(dae_ppp, sigma = 10000, dimyx = 500)
plot(k10000)
plot(dae_proj1$geometry, col = "forestgreen", add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)
Save all output as SpatialGrid, for differnt values of sigma in kernel density
SG <- as(k500, "SpatialGridDataFrame")
SG <- cbind(SG,as(kopt, "SpatialGridDataFrame"))
SG <- cbind(SG,as(k2000, "SpatialGridDataFrame"))
SG <- cbind(SG,as(k5000, "SpatialGridDataFrame"))
SG <- cbind(SG,as(k10000, "SpatialGridDataFrame"))
names(SG) <- c("k500","kopt","k2000","k5000","k10000")
With some plots of this kind, k500 and kopt show better granularity of this process
spplot(SG, c("k500","kopt"),col.regions=terrain.colors(11))
# fai una prova con un kernel diverso tipo quadratico tipo qui?
#kq_ppl <- density(casi_ppp,sigma=sigma_ppl,kernel="quartic",xy=griglia)
# Show results
summary(as.data.frame(SG)[,1:5])
## k500 kopt k2000
## Min. :0.000e+00 Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.000e+00 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :0.000e+00 Median :1.550e-10 Median :1.759e-08
## Mean :1.312e-07 Mean :1.312e-07 Mean :1.314e-07
## 3rd Qu.:3.930e-09 3rd Qu.:7.578e-08 3rd Qu.:1.212e-07
## Max. :4.276e-05 Max. :2.311e-05 Max. :9.253e-06
## k5000 k10000
## Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:3.646e-10 1st Qu.:8.066e-09
## Median :4.296e-08 Median :6.200e-08
## Mean :1.321e-07 Mean :1.327e-07
## 3rd Qu.:1.585e-07 3rd Qu.:1.924e-07
## Max. :2.405e-06 Max. :1.040e-06
CSR <-runifpoint(dae_ppp$n, win = dae_ppp$window)
plot(dae_ppp, cex=0.1, main= "DAE positions vs Random DAE (red)")
plot(CSR, add=T, col='red', pch=20, cex=0.1)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.5)
There is regular pattern of clusters, differently from the random
representation in red. Clusters are in correspondece to main city
centers and Riviera Romagnola, where in summer there is a significant
increase in people prensence.
Nearest neighbour distance
nns <- nndist(dae_ppp)
#summary(nns)
plot(ecdf(nns)) # empirical cumulative distribution function
It climbs very steeply in the early part of its range before flattening
out, then the indication would be an observed probability of short as
opposed to long nearest neighbour distances, which suggest inter-event
interaction (clustering) In about 5 km quite every DAE has a
neighbor.
Gfunc <- Gest(dae_ppp)
Gfunc2 <- Gest(dae_ppp,correction = "none")
plot(Gfunc,lwd=2)
Might be low informative in this study considering the #equivale alle
G-function senza nessuna correzione per i bordi # climbs very steeply in
the early part of its range before flattening out, then the indication
would # be an observed probability of short as opposed to long nearest
neighbour distances, which suggest inter-event # interaction
(clustering). in caso contrario ci sarebbe stata repulsione (che magari
c’è ad una scala più piccola, # tipo di 1-2 km in caso di presenza di un
altro dispositivo tipo nella stessa strada)
#calculates the nearest neighbor distances for each point in the dae_ppp point pattern, # provides a summary of these distances, and then plots the empirical cumulative distribution function (ECDF) # to visualize the spatial pattern of the points. This helps in understanding whether the points exhibit clustering, # regularity, or randomness in their distribution
#confronto con la distribuzione CSR (= complete spatial randomness = Processo di Poisson omogeneo)
ex <- expression(runifpoint(dae_ppp$n, win = dae_ppp$window))
resG <- envelope(dae_ppp, Gest, nsim = 99, simulate = ex,
verbose = FALSE, saveall = TRUE)
plot(resG)
plot(Gfunc,add=T)
## Funzione F: distanza punto-evento
?Fest
Ffunc <- Fest(dae_ppp)
plot(Ffunc,lwd=2)
#confronto con la distribuzione CSR (= complete spatial randomness = Processo di Poisson omogeneo)
resF <- envelope(dae_ppp, Fest, nsim = 99, simulate = ex,
verbose = FALSE, saveall = TRUE)
plot(resF)
#confronto F_obs - G_obs
#per fare il confronto le funzioni devono essere calcolare per gli stessi valori di distanza
dist <- seq(0,12000, 0.00258)
Foss <- Fest(dae_ppp,r=dist)
Goss <- Gest(dae_ppp,r=dist)
plot(Foss$rs,Goss$rs,xlim=c(0,1),ylim=c(0,1),type='l',xlab="F",ylab="G") #disegno le stime corrette per il bordo (border method or
# "reduced sample" estimator)
abline(a=0,b=1,col='red')
# Anche in questo caso, la linea retta con angolo a 45 gradi indica la CSR, quindi ci aspetteremmo che la
# curva osservata vi si avvicini nel caso in cui il processo fosse random. Invece, sarrebbe sopra la linea in
# caso di clustering (come osserviamo) e sotto in caso di regolarità. In questo caso, il clustering sembra
# piuttosto sostanziale.
## Funzione K
?Kest
Kfunc <- Kest(dae_ppp)
## number of data points exceeds 3000 - computing border correction estimate only
plot(Kfunc,lwd=2)
resK <- envelope(dae_ppp, Kest, nsim = 9, simulate = ex,
verbose = FALSE, saveall = TRUE)
plot(resK)
# La K function ci fornisce la distanza di un evento con ogni singolo altro evento e non solo col suo vicino più vicino.
# Anche qui il clustering è evidente, dato che si discosta di parecchio dalla linea rossa, che è la CSR (poisson).
## Funzione L (trasformazione della Ripley's K-function)
?Lest
Lfunc <- Lest(dae_ppp)
## number of data points exceeds 3000 - computing border correction estimate only
plot(Lfunc,lwd=2)
resL <- envelope(dae_ppp, Lest, nsim = 9, simulate = ex,
verbose = FALSE, saveall = TRUE)
plot(resL)
# Anche in questo caso, L(r) > r implica il clustering, che è ciò che osserviamo.
# Il felsso perchè aumentando la distanza c'è più regolarità?